# Copyright (c) 2020 Red Hat, Inc. All rights reserved. This copyrighted
# material is made available to anyone wishing to use, modify, copy, or
# redistribute it subject to the terms and conditions of the GNU General Public
# License v.2 or later.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

#  noqa: D301
"""
String set pattern.

A string set pattern is describing the strings a set should, or shouldn't
contain. It consists of regular expressions joined or modified by logical
operators.

Each regular expression evaluates to true if the matched set has one or more
strings matching it fully. E.g. this pattern:

    net

Would match a set containing "net". E.g. {"fs", "net", "mem"}, but not {"fs",
"mem"}, nor {"fs", "internet", "mem"}.

And this pattern:

    .*net.*

would match any of {"net"}, {"internet"}, and {"network"}. As well as e.g.
{"mem", "ethernet"}, but not {"neigh"}.

The regular expression evaluation results can be processed using the following
operators (in the order of decreasing precedence):

    "!"         - "not"     - negation
    "&"         - "and"     - conjunction
    "|"         - "or"      - disjunction

Grouping is supported using "(" and ")".

E.g. this pattern:

    kt1&net

would match any set containing both "kt1" and "net". E.g. {"kt1", "net"}, or
{"kt1", "fs", "net"}, but not {"kt1", "fs"}, nor {"fs", "net"}.

This pattern:

    !net

would match any set which doesn't contain "net". E.g. {"kt1", "fs"}, or an
empty set, but not {"net"}, nor {"fs", "net"}.

Finally, this pattern:

    (kt0|kt1)&(fs|net)&!stor

would match any set containing either "kt0" or "kt1" and either "fs" or "net",
but not "stor". E.g. {"kt0", "fs"}, or {"kt1", "net"}, or {"kt0", "net"}, but
not {"kt0", "kt1"}, nor {"fs", "net"}, nor {"kt0", "fs", "stor"}.

Any whitespace characters in the pattern will be ignored. E.g. the above
pattern can also be written like this:

    (kt0 | kt1) & (fs | net) & !stor

To include the operators or the whitespace characters into a regular
expression prepend them with a backslash ("\\"). E.g. this pattern:

    quick\\ brown\\ \\(fox\\|bear\\)

would match any set containing either "quick brown fox" or "quick brown bear".

The backslash itself can be included by prepending it with another backslash.
"""
import re

from ply import lex
from ply import yacc

# LEX & YACC conventions rule here, pylint: disable=invalid-name

tokens = ('REGEX', 'NOT', 'AND', 'OR', 'LPAREN', 'RPAREN')


class Error(Exception):
    """A compilation error."""


class VoidRegex(Exception):
    """Regular expression matches none of the available strings."""


def t_REGEX(t):  # noqa: D300,D400,D403
    r'([^\x00-\x20!&|()\\\x7f]|\\[\x09-\x0d\x20-\x7f])+'
    t.value = re.compile(re.sub(r'\\(.)', r'\1', t.value))
    if t.lexer.strings is not None and \
       all(not t.value.fullmatch(s) for s in t.lexer.strings):
        raise VoidRegex(f"Regular expression {t.value.pattern!r} "
                        f"matches none of the available strings: "
                        f"{t.lexer.strings!r}")
    return t


def t_WHITESPACE(_):  # noqa: D300,D400,D403
    r'[\x09-\x0d\x20]+'


t_NOT = r'!'
t_AND = r'&'
t_OR = r'\|'
t_LPAREN = r'\('
t_RPAREN = r'\)'


class InvalidCharacter(Error):
    """Invalid character encountered in pattern."""


def t_error(t):
    """Handle a lexer error."""
    raise InvalidCharacter(f"Illegal character {t.value[0]!r}")


precedence = (('left', 'OR'), ('left', 'AND'), ('right', 'NOT'))


def p_expression_binary(p):  # noqa D205,D300,D400,D403
    '''
    expression : expression OR expression
               | expression AND expression
    '''
    if p[2] == '|':
        p[0] = lambda string_set, left=p[1], right=p[3]: \
                    left(string_set) or right(string_set)
    elif p[2] == '&':
        p[0] = lambda string_set, left=p[1], right=p[3]: \
                    left(string_set) and right(string_set)


def p_expression_unary(p):  # noqa D300,D400,D403
    '''expression : NOT expression'''
    p[0] = lambda string_set, operand=p[2]: not operand(string_set)


def p_expression_parens(p):  # noqa D300,D400,D403
    '''expression : LPAREN expression RPAREN'''
    p[0] = p[2]


def p_expression_regex(p):  # noqa D300,D400,D403
    '''expression : REGEX'''
    p[0] = lambda string_set, regex=p[1]: \
        any(regex.fullmatch(string) for string in string_set)


class InvalidSyntax(Error):
    """Invalid syntax encountered in pattern."""


def p_error(p):
    """Handle a parser error."""
    if p:
        raise InvalidSyntax(f"Syntax error at {p.value!r}")
    raise InvalidSyntax("Syntax error at end of input")


PARSER = yacc.yacc(start='expression', debug=False, write_tables=False)


def compile(pattern, strings=None):
    """
    Compile a string set pattern into a function.

    Args:
        pattern:    The pattern string to compile into a function.
        strings:    A set containing strings one of which any parsed regular
                    expression must match, or None, if regular expressions are
                    allowed to match anything.

    Returns:
        A function accepting a set of strings and returning the boolean result
        of matching the pattern against the specified set.
    """
    assert isinstance(pattern, str)
    assert strings is None or \
        isinstance(strings, set) and all(isinstance(s, str) for s in strings)
    lexer = lex.lex()
    lexer.strings = strings
    return PARSER.parse(pattern, lexer=lexer)
